# -*- coding:utf8 -*-
# !/usr/bin/env python

'''
#全国企业信用信息公示系统（北京）
#维护黄羽
'''

import re
import urllib2
from bs4 import BeautifulSoup
from utils import kill_captcha
from scpy.request_util import *
# from request_util import *
from parse_util.parse_basesic import parse_basesic
from scpy.logger import get_logger
from table import index, report_index, money_notclean, table_clean, parse_time
from table import tr as clean_tr
from table import td_clean as clean_td
import datetime, time
import json
import copy

import sys, os

reload(sys)
sys.setdefaultencoding('utf8')

logger = get_logger(__file__)

companyName = '北京百度糯米信息技术有限公司'

index_url = 'http://qyxy.baic.gov.cn/beijing'

index_header = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate, sdch',
    'Accept-Language': 'zh-CN,zh;q=0.8',
    'Cache-Control': 'max-age=0',
    'Connection': 'keep-alive',
    'Host': 'qyxy.baic.gov.cn',
    'Referer': 'http://gsxt.saic.gov.cn/',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36'
}

index_req = RequestUtil()
# index_req.set_hreaders(index_header)
index_res = index_req.make_request(index_url, timeout=200)

index_set_cookie = index_res.headers['set-cookie']
index_set_cookie = index_set_cookie.replace("path=/", "").replace(",", "").replace(" ", "")

print 'index_set_cookie', index_set_cookie

index_html = index_res.content
check_code_servlet_name = re.findall('var checkCodeServletName = "CheckCodeCaptcha', index_html)
print 'check_code_servlet_name', check_code_servlet_name
# import pdb
# pdb.set_trace()

current_time_millis = re.findall(
    '<input type="hidden" name="currentTimeMillis" id="currentTimeMillis" value="(\d+?)"/>', index_html)
credit_ticket = re.findall('<input type="hidden" name="credit_ticket" id="credit_ticket" value="(.*?)"/>', index_html)
print current_time_millis
print credit_ticket
if current_time_millis and credit_ticket:
    current_time_millis = current_time_millis[0]
    credit_ticket = credit_ticket[0]
else:
    logger.error("网页发生变化！")
    raise Exception("网页发生变化！")

# import pdb
# pdb.set_trace()


# img_url = 'http://qyxy.baic.gov.cn/CheckCodeCaptcha?currentTimeMillis='+current_time_millis
if check_code_servlet_name:
    # 字符网站 bj1
    img_url = 'http://qyxy.baic.gov.cn/CheckCodeCaptcha?currentTimeMillis=' + current_time_millis
else:
    # 运算网站 bj
    img_url = 'http://qyxy.baic.gov.cn/CheckCodeYunSuan?currentTimeMillis=' + current_time_millis

img_headers = {
    'Accept': 'image/webp,image/*,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate, sdch',
    'Accept-Language': 'zh-CN,zh;q=0.8',
    'Connection': 'keep-alive',
    'Cookie': index_set_cookie,
    'Host': 'qyxy.baic.gov.cn',
    'Referer': 'http://qyxy.baic.gov.cn/beijing',
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36'
}

# img_req = RequestUtil()
img_req = index_req
img_req.set_hreaders(img_headers)
img_res = img_req.make_request(img_url, timeout=200)

# img_set_cookie = img_res.headers['set-cookie']
# img_set_cookie = img_set_cookie.replace("path=/", "").replace(",", "")
captcha = img_res.content

# print 'img_set_cookie', img_set_cookie

with open('./bj1.jpg', 'wb') as fp:
    fp.write(captcha)

try:
    res_code = kill_captcha(captcha, 'bj1', 'jpeg')
except Exception, e:
    logger.error("破解验证码的服务出现异常")
    logger.error(e)
    raise e
if not res_code and len(res_code) > 100:
    logger.info('验证码为:%s' % res_code)
    logger.error("破解验证码的服务出现异常,可能是下载的验证码错误，也可能破解服务出现异常！")
    # return ''   # 返回空字符串，用于重复破解
else:
    logger.info('验证码为:%s' % res_code)
    pass

res_code = raw_input('code=')

check_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!checkCode.dhtml'
literal_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!findLiteralWord.dhtml'
com_list_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!getBjQyList.dhtml'

check_headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.8',
    'Cache-Control': 'max-age=0',
    'Connection': 'keep-alive',
    'Content-Length': '135',
    'Content-Type': 'application/x-www-form-urlencoded',
    'Cookie': index_set_cookie,
    'Host': 'qyxy.baic.gov.cn',
    'Origin': 'http://qyxy.baic.gov.cn',
    'Referer': 'http://qyxy.baic.gov.cn/beijing',
    # 'Referer':'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!toIndex.dhtml',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36'
}

check_data = {
    'currentTimeMillis': current_time_millis,
    'credit_ticket': credit_ticket,
    'checkcode': res_code,
    'keyword': companyName,
}
print 'check_data', check_data
check_req = img_req
check_req.set_hreaders(check_headers)
# time.sleep(2)
check_res = check_req.make_request(check_url, data=check_data, method='post', timeout=100).content
# print check_res
if check_res == 'success':
    pass
elif check_res == 'fail':
    logger.error("验证码破解错误，重复破解！")
    # return ''
    raise Exception("网页发生变化！")
    pass
else:
    logger.error("网页发生变化！")
    raise Exception("网页发生变化！")

logger.info(check_res)
# time.sleep(2)
literal_res = check_req.make_request(literal_url, data=check_data, method='post', timeout=20)
# print 'literal_res\n\t', literal_res, '\t', literal_res.content, '\t', literal_res.headers
# time.sleep(2)
com_list_html = check_req.make_request(com_list_url, data=check_data, method='post', timeout=20)
# print 'com_list_html\n\t', com_list_html, '\t', com_list_html.content, '\t', com_list_html.headers

com_list_html = com_list_html.content
re.findall('', com_list_html)

com_list = re.findall('onclick="openEntInfo\((.*?)\);', com_list_html)
if com_list:
    com_temp = com_list[0].replace("'", "").replace(" ", "").split(',')
else:
    logger.info("搜索的公司不存在！")
    # pass
    # return None

var_current = re.findall('var currentTimeMillis = (.*?);', com_list_html)
var_current = var_current[0].replace("'", "") if var_current else None

print 'var_current', var_current

if var_current:
    if check_code_servlet_name:
        # 字符网站 bj1
        img_url = 'http://qyxy.baic.gov.cn/CheckCodeCaptcha?currentTimeMillis=' + var_current
    else:
        # 运算网站 bj
        img_url = 'http://qyxy.baic.gov.cn/CheckCodeYunSuan?currentTimeMillis=' + var_current
else:
    raise Exception("网页发生变化！")

print img_req.make_request(img_url, timeout=200)

# import pdb
# pdb.set_trace()


'/qynb/entinfoAction!qyxx.dhtml?entId=20e38b8c36c416060136d838f8646ac9&entName=北京百度糯米信息技术有限公司&entNo=110000450203508&str=2&timeStamp=0B1A22C1DBD8ADBFA4564F4F7559878E'

'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!openEntInfo.dhtml?entId=20e38b8c36c416060136d838f8646ac9&entNo=110000450203508&credit_ticket=F6B9C1C6FB4154D33F477CAC2C64390B&str=1&timeStamp=1447809067803'


# 基本信息
base_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!openEntInfo.dhtml?'
# 股东
share_holder_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!tzrFrame.dhtml?'
# 变更信息
alter_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!biangengFrame.dhtml?'
# 清算，第二页
liquidation_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!qsxxFrame.dhtml?'
# 主要人员
person_url = 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!zyryFrame.dhtml?'
# 抽查检查
checkMessage_url = 'http://qyxy.baic.gov.cn/gsgs/gsxzcfAction!list_ccjcxx.dhtml?'
# 经营异常
abnormal_operation_url = 'http://qyxy.baic.gov.cn/gsgs/gsxzcfAction!list_jyycxx.dhtml?'

# asic_req = check_req


asic_headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate, sdch',
    'Accept-Language': 'zh-CN,zh;q=0.8',
    'Connection': 'keep-alive',
    'Cookie': index_set_cookie,
    'Host': 'qyxy.baic.gov.cn',
    'Referer': 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!getBjQyList.dhtml',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36'
}

# from request_util import RequestUtil as RequestUtil2
asic_req = check_req
# asic_req = RequestUtil()
# asic_req.set_hreaders(asic_headers)
# asic_req = RequestUtil2(asic_headers)
# com_base_headers = check_headers
# time.sleep(10)

import urllib2

com_base_data = {
    'entId': com_temp[1],
    'credit_ticket': com_temp[3],
    'entNo': com_temp[2],
    'timeStamp': int(round(time.time() * 1000)),
}
print com_base_data

import cookielib

cookiejar = cookielib.CookieJar()
urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
urllib2.install_opener(urlopener)
urlopener.addheaders.append(('Cookie', index_set_cookie))
urlopener.addheaders.append(('Host', 'qyxy.baic.gov.cn'))
urlopener.addheaders.append(('Referer', 'http://qyxy.baic.gov.cn/gjjbj/gjjQueryCreditAction!getBjQyList.dhtml'))
# urlopener.addheaders.append(('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36'))

lib2_url = '%sentId=%s&credit_ticket=%s&entNo=%s&timeStamp=%s' % (
base_url, com_temp[1], com_temp[3], com_temp[2], int(round(time.time() * 1000)))
print lib2_url
res = urllib2.urlopen(urllib2.Request(lib2_url)).read()



# import pdb
# pdb.set_trace()

base_res = asic_req.make_request(base_url, data=com_base_data, method='get').content
print 'base_res', BeautifulSoup(base_res, 'html5lib')
# BeautifulSoup(base_res, 'html5lib')
#
#
# # time.sleep(1)
# alter_data = {
#     'ent_id':com_temp[1],
#     'clear':'true',
#     'timeStamp':int(round(time.time()*1000)),
# }
# alter_res = asic_req.make_request(alter_url, data=alter_data, method='get').content
# print 'alter_res',alter_res
#
#
# # time.sleep(1)
# share_holder_data = {
#     'ent_id':com_temp[1],
#     'entName':'',
#     'clear':'true',
#     'timeStamp':int(round(time.time()*1000)),
# }
# share_holder_res = asic_req.make_request(share_holder_url, data=share_holder_data, method='get').content
# print 'share_holder_res',share_holder_res
#
# base_res = asic_req.make_request(base_url, data=com_base_data, method='get').content
# print 'base_res',BeautifulSoup(base_res, 'html5lib')
# BeautifulSoup(base_res, 'html5lib')
#
#
#
# liquidation_res = asic_req.make_request(liquidation_url, data=share_holder_data).content
# print 'liquidation_res',liquidation_res
# person_res = asic_req.make_request(person_url, data=share_holder_data).content
# print 'person_res',person_res
# checkMessage_res = asic_req.make_request(checkMessage_url, data=share_holder_data).content
# print 'checkMessage_res',checkMessage_res
#
# abnormal_operation_data = {
#     'ent_id':com_temp[1],
#     'clear':'true',
#     'timeStamp':int(round(time.time()*1000)),
# }
#
# time.sleep(1.333)
# abnormal_operation_res = asic_req.make_request(abnormal_operation_url, data=abnormal_operation_data).content
# print 'abnormal_operation_res',abnormal_operation_res

import pdb

pdb.set_trace()
